Loading packages for the plots
library(ggplot2)
<<<<<<< HEAD
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(flexdashboard)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(leaflet)
Reading in Cleaned Data
alcohol_data_2007 = read_csv("./data/PRAM_2007_alcohol.csv")
## Rows: 5515 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): LocationAbbr, Class, Topic, Question, DataSource, Response, Data_V...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl (1): Data_Value_Std_Err
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tobacco_data_2007 = read_csv("./data/PRAM_2007_tobacco.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 42838 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): LocationAbbr, Class, Topic, Question, DataSource, Response, Data_V...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl (1): Data_Value_Std_Err
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no_contraception_data_2007 = read_csv("./data/PRAM_2007_no_contraception.csv")
## Rows: 6015 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): LocationAbbr, LocationDesc, Class, Topic, Question, DataSource, Re...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl (2): Data_Value_Footnote_Symbol, Data_Value_Std_Err
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
infant_mortality_df = read_csv("./data/PRAM_2007_infantmortality.csv")
## Rows: 1386 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): LocationAbbr, LocationDesc, Class, Topic, Question, DataSource, Re...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl (2): Data_Value_Footnote_Symbol, Data_Value_Std_Err
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# cleaned alcohol data
=======
library(plotly)
library(flexdashboard)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## Warning: package 'lubridate' was built under R version 4.1.3
library(leaflet)
Reading in Cleaned Data
alcohol_data_2007 = read_csv("./data/PRAM_2007_alcohol.csv")
tobacco_data_2007 = read_csv("./data/PRAM_2007_tobacco.csv")
no_contraception_data_2007 = read_csv("./data/PRAM_2007_no_contraception.csv")
infant_mortality_df = read_csv("./data/PRAM_2007_infantmortality.csv")
# cleaned alcohol data
>>>>>>> 94e7f9778c789b861d08491901c0e1020c6aa653
cleaned_alc_2007 <- alcohol_data_2007 |>
janitor::clean_names() |>
select(-data_value_std_err, -data_value_type) |>
filter(response != "DRINKER WHO QUIT") |>
filter(response != "NONDRINKER") |>
filter( response != "NO") |>
drop_na(response,geolocation) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
# cleaned tobacco data
cleaned_tobac_2007 <- tobacco_data_2007 |>
janitor::clean_names() |>
select(-data_value_type) |>
filter(response != "SMOKER WHO QUIT") |>
filter(response != "NONSMOKER") |>
filter(response != "None (0 cig)") |>
filter( response != "NO") |>
drop_na(response, geolocation) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
<<<<<<< HEAD
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [13248].
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `latitude = as.numeric(str_replace_all(latitude, "\\(|\\)",
## ""))`.
## Caused by warning:
## ! NAs introduced by coercion
no_alcohol_data_2007 = read_csv("./data/PRAM_2007_no_alcohol.csv")
## Rows: 54 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): LocationAbbr, LocationDesc, Class, Topic, Question, DataSource, Re...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl (3): Data_Value_Footnote_Symbol, Data_Value_Footnote, Data_Value_Std_Err
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no_tobacco_data_2007 = read_csv("./data/PRAM_2007_no_tobacco.csv")
## Rows: 54 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): LocationAbbr, LocationDesc, Class, Topic, Question, DataSource, Re...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl (3): Data_Value_Footnote_Symbol, Data_Value_Footnote, Data_Value_Std_Err
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
contraception_data_2007 = read_csv("./data/PRAM_2007_contraception.csv")
## Rows: 12030 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): LocationAbbr, Class, Topic, Question, DataSource, Response, Data_V...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Sam...
## lgl (1): Data_Value_Std_Err
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# cleaned no alcohol data
=======
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
no_alcohol_data_2007 = read_csv("./data/PRAM_2007_no_alcohol.csv")
no_tobacco_data_2007 = read_csv("./data/PRAM_2007_no_tobacco.csv")
contraception_data_2007 = read_csv("./data/PRAM_2007_contraception.csv")
# cleaned no alcohol data
>>>>>>> 94e7f9778c789b861d08491901c0e1020c6aa653
cleaned_no_alc_2007 <- no_alcohol_data_2007 |>
janitor::clean_names() |>
select(-data_value_std_err, -geolocation, -data_value_type) |>
drop_na(response)
view(cleaned_no_alc_2007)
# cleaned no tobacco data
cleaned_no_tobacco_2007 <- no_tobacco_data_2007 |>
janitor::clean_names() |>
select(-data_value_std_err, -geolocation, -data_value_type) |>
drop_na(response)
# cleaned infant mortality
cleaned_infant_mortality <- infant_mortality_df |>
janitor::clean_names() |>
select(-data_value_std_err, -data_value_type, -data_value_unit, -data_value_footnote_symbol, -data_value_footnote) |>
drop_na(response, geolocation) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
# cleaned conception
cleaned_contraception_2007 <- contraception_data_2007 |>
janitor::clean_names() |>
select(-data_value_std_err, -geolocation, -data_value_type) |>
filter(response != "YES (CHECKED)") |>
filter(response != "YES") |>
drop_na(response)
# cleaned non conception
cleaned_no_contra_2007 <- no_contraception_data_2007 %>%
janitor::clean_names() %>%
select(-data_value_type) %>%
drop_na(response) |>
separate(geolocation, into = c("latitude", "longitude"), sep = ", ", convert = TRUE) |>
mutate(latitude = as.numeric(str_replace_all(latitude, "\\(|\\)", "")), # Convert to numeric and remove parentheses
longitude = as.numeric(str_replace_all(longitude, "\\(|\\)", ""))) # Convert to numeric and remove parentheses
Plot 1: Alcohol Consumption in relation to Infant Mortality
cleaned_infant_mortality <- infant_mortality_df |>
janitor::clean_names() |>
select(-data_value_std_err, -data_value_type, -data_value_unit, -data_value_footnote_symbol, -data_value_footnote) |>
drop_na(response)
# Plot of question and responses for alcohol
cleaned_alc_2007 |>
ggplot(aes(x = question, fill = response)) +
geom_bar(position = "dodge") +
labs(title = "Questions and Responses", x = "Questions", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(
x = "Question",
y = "Response",
title = "Questions vs Response of Alcohol Consumption"
)

# creating "yes" variable
# plot showing infant mortality rate vs alcohol consumption
ggplot() +
geom_point(data = cleaned_alc_2007, aes(x = question, y = response), color = "blue", size = 3) +
geom_point(data = cleaned_infant_mortality, aes(x = question, y = response), color = "red", size = 3) +
labs(title = "Scatter Plot of Two Variables from Different Datasets",
x = "X-axis Label",
y = "Y-axis Label") +
theme_minimal()

Plot 2: Tobacco Consumption in relation to Infant Mortality
Plot 3: No Consumption in relation to Infant Mortality
leaflet() |>
addTiles() |>
addCircleMarkers(data = cleaned_alc_2007,
lng = ~longitude, # Adjust column name if needed
lat = ~latitude, # Adjust column name if needed
label = ~location_abbr, # Assuming 'Group.1' is a column in your data
radius = 7,
color = "orange",
stroke = TRUE,
fillOpacity = 0.75,
popup = ~paste("Response:", response))
<<<<<<< HEAD
=======
<<<<<<< HEAD
=======
>>>>>>> 94e7f9778c789b861d08491901c0e1020c6aa653
>>>>>>> 87eda6801483a374e516ba8f783995c3d11f822b
leaflet() |>
addTiles() |>
addCircleMarkers(data = cleaned_tobac_2007,
lng = ~longitude, # Adjust column name if needed
lat = ~latitude, # Adjust column name if needed
label = ~location_abbr, # Assuming 'Group.1' is a column in your data
radius = 7,
color = "orange",
stroke = TRUE,
fillOpacity = 0.75,
popup = ~paste("Response:", response))
<<<<<<< HEAD
=======
<<<<<<< HEAD
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
=======
>>>>>>> 94e7f9778c789b861d08491901c0e1020c6aa653
>>>>>>> 87eda6801483a374e516ba8f783995c3d11f822b